From 1f5b281382b8511481bdd1ac0baef417a32779fb Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= Date: Sat, 31 Mar 2012 14:58:49 +0100 Subject: [PATCH] extensions: add an extension with some float optimizations Alos improve some alpha unpremultiplications in other extensions. --- extensions/Makefile.am | 4 + extensions/float.c | 324 +++++++++++++++++++++++++++++++++++++++ extensions/gegl-fixups.c | 6 +- extensions/gggl.c | 3 +- 4 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 extensions/float.c diff --git a/extensions/Makefile.am b/extensions/Makefile.am index fac0123..c017046 100644 --- a/extensions/Makefile.am +++ b/extensions/Makefile.am @@ -6,6 +6,7 @@ endif noinst_HEADERS = util.h + AM_CPPFLAGS = \ -I$(top_builddir) \ -I$(top_srcdir) \ @@ -20,6 +21,7 @@ ext_LTLIBRARIES = \ gggl-lies.la \ gggl.la \ gimp-8bit.la \ + float.la \ naive-CMYK.la \ sse-fixups.la @@ -31,7 +33,9 @@ gggl_la_SOURCES = gggl.c gimp_8bit_la_SOURCES = gimp-8bit.c naive_CMYK_la_SOURCES = naive-CMYK.c sse_fixups_la_SOURCES = sse-fixups.c +float_la_SOURCES = float.c LIBS = $(top_builddir)/babl/libbabl-@BABL_API_VERSION@.la $(MATH_LIB) sse_fixups_la_CFLAGS = $(MMX_EXTRA_CFLAGS) $(SSE_EXTRA_CFLAGS) +float_la_CFLAGS = -std=c99 diff --git a/extensions/float.c b/extensions/float.c new file mode 100644 index 0000000..4fc7f98 --- /dev/null +++ b/extensions/float.c @@ -0,0 +1,324 @@ +/* babl - dynamically extendable universal pixel conversion library. + * Copyright (C) 2012, Øyvind Kolås + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General + * Public License along with this library; if not, see + * . + */ + +#include "config.h" + +#include +#include + +#include "babl.h" +#include "babl-cpuaccel.h" +#include "extensions/util.h" +#include "base/util.h" + +#define INLINE inline + + +/* + optimized powf from: + +http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent + + by David Hammen +*/ + +// Returns x^(5/12) for x in [1,2), to within 3e-8 (relative error). +// Want more precision? Add more Chebychev polynomial coefs. +static INLINE double pow512norm ( + double x) +{ + static const int N = 8; + + // Chebychev polynomial terms. + // Non-zero terms calculated via + // integrate (2/pi)*ChebyshevT[n,u]/sqrt(1-u^2)*((u+3)/2)^(5/12) + // from -1 to 1 + // Zeroth term is similar except it uses 1/pi rather than 2/pi. + static const double Cn[8] = { + 1.1758200232996901923, + 0.16665763094889061230, + -0.0083154894939042125035, + 0.00075187976780420279038, + // Wolfram alpha doesn't want to compute the remaining terms + // to more precision (it times out). + -0.0000832402, + 0.0000102292, + -1.3401e-6, + 1.83334e-7}; + + double Tn[N]; + + double u = 2.0*x - 3.0; + int i; + double y = 0.0; + + Tn[0] = 1.0; + Tn[1] = u; + for (i = 2; i < N; ++i) { + Tn[i] = 2*u*Tn[i-1] - Tn[i-2]; + } + + for (i = N-1; i >= 0; --i) { + y += Cn[i]*Tn[i]; + } + + return y; +} + +// Returns x^(5/12) to within 3e-8 (relative error). +static INLINE double pow512 ( + double x) +{ + static const double pow2_512[12] = { + 1.0, + pow(2.0, 5.0/12.0), + pow(4.0, 5.0/12.0), + pow(8.0, 5.0/12.0), + pow(16.0, 5.0/12.0), + pow(32.0, 5.0/12.0), + pow(64.0, 5.0/12.0), + pow(128.0, 5.0/12.0), + pow(256.0, 5.0/12.0), + pow(512.0, 5.0/12.0), + pow(1024.0, 5.0/12.0), + pow(2048.0, 5.0/12.0) + }; + + double s; + int iexp; + + s = frexp (x, &iexp); + s *= 2.0; + iexp -= 1; + + div_t qr = div (iexp, 12); + if (qr.rem < 0) { + qr.quot -= 1; + qr.rem += 12; + } + + return ldexp (pow512norm(s)*pow2_512[qr.rem], 5*qr.quot); +} + +static inline double +flinear_to_gamma_2_2 (double value) +{ + if (value > 0.0030402477F) + return 1.055F * pow512 (value) - 0.055F; + return 12.92F * value; +} + +static INLINE long +conv_rgbaF_linear_rgbAF_gamma (unsigned char *src, + unsigned char *dst, + long samples) +{ + float *fsrc = (float *) src; + float *fdst = (float *) dst; + int n = samples; + + while (n--) + { + float alpha = fsrc[3]; + *fdst++ = flinear_to_gamma_2_2 (*fsrc++) * alpha; + *fdst++ = flinear_to_gamma_2_2 (*fsrc++) * alpha; + *fdst++ = flinear_to_gamma_2_2 (*fsrc++) * alpha; + *fdst++ = *fsrc++; + } + return samples; +} + +static INLINE long +conv_rgbAF_linear_rgbAF_gamma (unsigned char *src, + unsigned char *dst, + long samples) +{ + float *fsrc = (float *) src; + float *fdst = (float *) dst; + int n = samples; + + while (n--) + { + float alpha = fsrc[3]; + if (alpha < BABL_ALPHA_THRESHOLD) + { + *fdst++ = 0.0; + *fdst++ = 0.0; + *fdst++ = 0.0; + *fdst++ = 0.0; + fsrc+=4; + } + else if (alpha >= 1.0) + { + *fdst++ = flinear_to_gamma_2_2 (*fsrc++); + *fdst++ = flinear_to_gamma_2_2 (*fsrc++); + *fdst++ = flinear_to_gamma_2_2 (*fsrc++); + *fdst++ = *fsrc++; + } + else + { + float alpha_recip = 1.0 / alpha; + *fdst++ = flinear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha; + *fdst++ = flinear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha; + *fdst++ = flinear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha; + *fdst++ = *fsrc++; + } + } + return samples; +} + +static INLINE long +conv_rgbaF_linear_rgbaF_gamma (unsigned char *src, + unsigned char *dst, + long samples) +{ + float *fsrc = (float *) src; + float *fdst = (float *) dst; + int n = samples; + + while (n--) + { + *fdst++ = flinear_to_gamma_2_2 (*fsrc++); + *fdst++ = flinear_to_gamma_2_2 (*fsrc++); + *fdst++ = flinear_to_gamma_2_2 (*fsrc++); + *fdst++ = *fsrc++; + } + return samples; +} + +static INLINE long +conv_rgbF_linear_rgbF_gamma (unsigned char *src, + unsigned char *dst, + long samples) +{ + float *fsrc = (float *) src; + float *fdst = (float *) dst; + int n = samples; + + while (n--) + { + *fdst++ = flinear_to_gamma_2_2 (*fsrc++); + *fdst++ = flinear_to_gamma_2_2 (*fsrc++); + *fdst++ = flinear_to_gamma_2_2 (*fsrc++); + } + return samples; +} + + +static INLINE long +conv_rgbaF_gamma_rgbaF_linear (unsigned char *src, + unsigned char *dst, + long samples) +{ + float *fsrc = (float *) src; + float *fdst = (float *) dst; + int n = samples; + + while (n--) + { + *fdst++ = gamma_2_2_to_linear (*fsrc++); + *fdst++ = gamma_2_2_to_linear (*fsrc++); + *fdst++ = gamma_2_2_to_linear (*fsrc++); + *fdst++ = *fsrc++; + } + return samples; +} + +static INLINE long +conv_rgbF_gamma_rgbF_linear (unsigned char *src, + unsigned char *dst, + long samples) +{ + float *fsrc = (float *) src; + float *fdst = (float *) dst; + int n = samples; + + while (n--) + { + *fdst++ = gamma_2_2_to_linear (*fsrc++); + *fdst++ = gamma_2_2_to_linear (*fsrc++); + *fdst++ = gamma_2_2_to_linear (*fsrc++); + } + return samples; +} + +#define o(src, dst) \ + babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL) + +int init (void); + +int +init (void) +{ + const Babl *rgbaF_linear = babl_format_new ( + babl_model ("RGBA"), + babl_type ("float"), + babl_component ("R"), + babl_component ("G"), + babl_component ("B"), + babl_component ("A"), + NULL); + const Babl *rgbAF_linear = babl_format_new ( + babl_model ("RaGaBaA"), + babl_type ("float"), + babl_component ("Ra"), + babl_component ("Ga"), + babl_component ("Ba"), + babl_component ("A"), + NULL); + const Babl *rgbaF_gamma = babl_format_new ( + babl_model ("R'G'B'A"), + babl_type ("float"), + babl_component ("R'"), + babl_component ("G'"), + babl_component ("B'"), + babl_component ("A"), + NULL); + const Babl *rgbAF_gamma = babl_format_new ( + babl_model ("R'aG'aB'aA"), + babl_type ("float"), + babl_component ("R'a"), + babl_component ("G'a"), + babl_component ("B'a"), + babl_component ("A"), + NULL); + const Babl *rgbF_linear = babl_format_new ( + babl_model ("RGB"), + babl_type ("float"), + babl_component ("R"), + babl_component ("G"), + babl_component ("B"), + NULL); + const Babl *rgbF_gamma = babl_format_new ( + babl_model ("R'G'B'"), + babl_type ("float"), + babl_component ("R'"), + babl_component ("G'"), + babl_component ("B'"), + NULL); + + o (rgbAF_linear, rgbAF_gamma); + o (rgbaF_linear, rgbAF_gamma); + o (rgbaF_linear, rgbaF_gamma); + o (rgbaF_gamma, rgbaF_linear); + o (rgbF_linear, rgbF_gamma); + o (rgbF_gamma, rgbF_linear); + + return 0; +} + diff --git a/extensions/gegl-fixups.c b/extensions/gegl-fixups.c index 3fb6977..7f3b8ff 100644 --- a/extensions/gegl-fixups.c +++ b/extensions/gegl-fixups.c @@ -473,9 +473,11 @@ conv_rgbAF_rgbaF (unsigned char *srcc, while (n--) { float alpha = src[3]; - float recip = 1.0/alpha; + float recip; if (alpha < BABL_ALPHA_THRESHOLD) recip = 0.0; + else + recip = 1.0/alpha; dst[0] = src[0] * recip; dst[1] = src[1] * recip; dst[2] = src[2] * recip; @@ -518,8 +520,6 @@ conv_rgbAF_lrgba8 (unsigned char *srcc, return samples; } - - #define conv_rgb8_rgbAF conv_rgb8_rgbaF int init (void); diff --git a/extensions/gggl.c b/extensions/gggl.c index 16d3276..6a007c6 100644 --- a/extensions/gggl.c +++ b/extensions/gggl.c @@ -610,9 +610,10 @@ conv_rgbAF_rgbaF (unsigned char *src, unsigned char *dst, long samples) } else { + float alpha_recip = 1.0 / alpha; for (c = 0; c < 3; c++) { - *(float *) dst = ((*(float *) src) / alpha); + *(float *) dst = ((*(float *) src) * alpha_recip); dst += 4; src += 4; } -- 2.30.2